Topics:

**if time

## [1] "Last knit to html at:  2015-10-06 18:14:21"

Review

What we learned last time:

  1. Navigate your data frames, using square-bracket and “$” notation.
  2. Use logical statements to subset and clean data.
  3. Exporting data
  4. Formatting data for ggplot
  5. Changing ggplot themes

Exercises(0):

0.1 Set your working directory to the Session4 folder.

0.2 Import the “CallinPollen.csv” dataset, and call it “flr”.

0.3 Make sure the “ggplot2” and “grid” libraries is loaded with e.g. “library(ggplot2)”. If you’re having trouble, use “install.packages(”ggplot2“)” before loading the library.

0.4 Use ggplot to plot a scatterplot like this:

 ggplot(flr, aes(x = shakeNumber, y = slope)) + 
      geom_point() + 
      theme_bw()
      

# 0.1
setwd("/Users/callinswitzer/Dropbox/Harvard/RByCall/RClassMaterials/Session4")

# 0.2
library(ggplot2)
library(grid) # we need this for the "unit" function

# 0.3
flr <- read.csv("Datasets/CallinPollen.csv")

# 0.4
p1 <- ggplot(flr, aes(x = shakeNumber, y = slope)) + 
     geom_point() + 
     theme_bw()
p1

1. Visualization with ggplot

Coloring / shaping points based on a factor

# read in data
crs <- read.csv("Datasets/mtcars.txt", sep = " ")

# plot it
ggplot(crs, aes(x = wt, y = mpg)) + 
     geom_point()

# color points, based on the number of cylinders
ggplot(crs, aes(x = wt, y = mpg)) + 
     geom_point(aes(color = cyl))

# same thing
ggplot(crs, aes(x = wt, y = mpg, color = cyl)) + 
     geom_point()

# we don't want a spectrum of colors, so we'll make crs$cyl into a factor
crs$cyl <- as.factor(crs$cyl)

ggplot(crs, aes(x = wt, y = mpg)) + 
     geom_point(aes(color = cyl))

# change color scheme
ggplot(crs, aes(x = wt, y = mpg)) + 
     geom_point(aes(fill= cyl), color = "black",  shape = 21, size = 4) + 
     theme_classic() + 
     scale_fill_manual(values = c("black", "grey50", "white"))

# color points, based the cyl being greater than 4
Cyl.Gr.4 <- as.numeric(as.character(crs$cyl)) > 4
ggplot(crs, aes(x = wt, y = mpg)) + 
     geom_point(aes(fill= Cyl.Gr.4), color = "black",  shape = 21, size = 4) + 
     theme_classic() + 
     scale_fill_manual(values = c("black", "white"))

# size points, based on a factor
ggplot(crs, aes(x = wt, y = mpg)) + 
     geom_point(aes(size = Cyl.Gr.4), color = "black", fill = "grey",  shape = 21) + 
     theme_classic() + 
     scale_size_manual(values = c(1, 10))

# you can do the same thing with shape
ggplot(crs, aes(x = wt, y = mpg)) + 
     geom_point(aes(shape = Cyl.Gr.4), color = "black", fill = "grey", size = 4) + 
     theme_classic() + 
     scale_shape_manual(values = c(22, 21), name = "Num. Cyl. \n    > 4")


Exercises(1):

1.1 With the cars dataset, use ggplot to make a scatterplot of x = disp, y = mpg. Color the points differently, based on the “am” column (automatic transmission). Make sure that “am” is a factor or logical. Feel free to change the size, theme, etc.

ggplot(crs, aes(x = disp, y = mpg)) + 
     geom_point(aes(color = as.factor(am)), size = 4) + 
     theme_classic() + 
     scale_color_brewer(palette = "Paired", name = "Automatic Tranny")


Making boxplots

################################  EXAMPLE 1 ################################ 
# making a boxplot 
################################  EXAMPLE 1 ################################ 

# here's a plot that looks weird as points
ggplot(flr, aes(x = shakeNumber, y = slope)) + 
     geom_point()

# say we think that shake number should be represented with a boxplot, rather than with dots
p2 <- ggplot(flr, aes(x = shakeNumber, y = slope)) + 
     geom_boxplot()
p2 # not quite right

p3 <- ggplot(flr, aes(x = as.factor(shakeNumber), y = slope)) + 
     geom_boxplot()
p3 # much better

# log transform 
p3 + scale_y_log10()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

################################  EXAMPLE 2 ################################
# changing the order of the boxes in a plot
################################  EXAMPLE 2 ################################

# a basic boxplot
p4 <- ggplot(flr, aes(x = trt, y = int)) + 
     geom_boxplot()
p4

# changing order of the boxes -- change levels of a factor
str(flr$trt) # factor with levels alphabetized
##  Factor w/ 9 levels "AC","AC and dehumidifier",..: 4 4 4 4 4 4 4 4 4 4 ...
# we can change the order of the factor levels to change the plot
levels(flr$trt)
## [1] "AC"                            "AC and dehumidifier"          
## [3] "AC set to 62 overnight"        "Ambient"                      
## [5] "dehumidifier"                  "heat and humidifier"          
## [7] "heat and humidifier in alcove" "heat set to 80F overnight"    
## [9] "Humidifier"
newlevels <- c('heat and humidifier in alcove', 'heat and humidifier', 'AC', 'AC and dehumidifier', 'AC set to 62 overnight', 'heat set to 80F overnight', 'Ambient', 'dehumidifier', 'Humidifier')
flr$trt <- factor(flr$trt, levels =  newlevels)

p5 <- ggplot(flr, aes(x = trt, y = int)) + 
     geom_boxplot() + 
     theme(axis.text.x = element_text(angle = 45, hjust = 1), # make text 45 degrees
           plot.margin = unit(c(2,1,1,1), "cm")) # move the left margin over
p5

# we can order the factor levels based on their means
(trtMeans <- tapply(X = flr$int, FUN = mean, INDEX = flr$trt))
## heat and humidifier in alcove           heat and humidifier 
##                    0.05134368                    0.06338917 
##                            AC           AC and dehumidifier 
##                    0.14195595                    0.09301399 
##        AC set to 62 overnight     heat set to 80F overnight 
##                    0.12963241                    0.05722544 
##                       Ambient                  dehumidifier 
##                    0.09773810                    0.08195999 
##                    Humidifier 
##                    0.14091004
# sort the treatments
(trtMeans.sort <- trtMeans[order(trtMeans, decreasing = TRUE)])
##                            AC                    Humidifier 
##                    0.14195595                    0.14091004 
##        AC set to 62 overnight                       Ambient 
##                    0.12963241                    0.09773810 
##           AC and dehumidifier                  dehumidifier 
##                    0.09301399                    0.08195999 
##           heat and humidifier     heat set to 80F overnight 
##                    0.06338917                    0.05722544 
## heat and humidifier in alcove 
##                    0.05134368
names(trtMeans.sort) # sorted vector, based on mean
## [1] "AC"                            "Humidifier"                   
## [3] "AC set to 62 overnight"        "Ambient"                      
## [5] "AC and dehumidifier"           "dehumidifier"                 
## [7] "heat and humidifier"           "heat set to 80F overnight"    
## [9] "heat and humidifier in alcove"
# change the factor levels     
flr$trt <- factor(flr$trt, levels = names(trtMeans.sort))

p6.ordered <- ggplot(flr, aes(x = trt, y = int)) + 
     geom_boxplot(fill = 'black', alpha = 0.5, outlier.colour = NA) + # don't show outliers
     theme_bw() + 
     theme(axis.text.x = element_text(angle = 45, hjust = 1), 
           panel.grid.major = element_blank(), 
           panel.grid.minor = element_blank(), 
           plot.margin = unit(c(2,1,1,1), "cm")) + 
     labs(x = "Treatment", y = "Pollen release (integration method)")

p6.ordered 

p6.ordered + geom_point() # add points to our plot

p7 <- p6.ordered + geom_point(position = position_jitter(width = 0.1, height = 0), alpha = 0.5)
p7

# save the plot
ggsave("boxpltTrt.pdf", plot = p7,  device = cairo_pdf, width = 89, height = 60, unit = "mm")
file.remove("boxpltTrt.pdf") # remove plot 
## [1] TRUE
################################  EXAMPLE 3 ################################
# changing the order of the bars in a plot
################################  EXAMPLE 3 ################################
surv <- read.csv("Datasets/survey.csv")

# map values to shorter words
library(plyr)
surv$Rexp <- mapvalues(surv$Rexp, from = as.character(levels(surv$Rexp)), to = c("Instar", "Adult", "Egg", "Pupa"))

ggplot(surv, aes(x = Rexp)) + 
     geom_bar()  + 
     theme_minimal()

# set new levels
surv$Rexp <- factor(surv$Rexp, levels = c("Egg", "Instar", "Pupa", "Adult"))

# replot
ggplot(surv, aes(x = Rexp)) + 
     geom_bar() + 
     theme_minimal()


Exercises(1, cont’):

1.2 Read in (redo it, if you’ve read it in earlier) the surv.csv file. Use these commands to clean your data:

 # makes a new, small data frame that contains only the same levels as the Rexp column
 # drops factor levels with 0 items
 s.sm <- droplevels(surv[surv$BestExp %in% surv$Rexp, ]) 
 
 # maps values to one-word, instead of long strings
 library(plyr)
 s.sm$BestExp <- mapvalues(s.sm$BestExp, from = as.character(levels(s.sm$BestExp)), to = c("Instar", "Adult", "Egg", "Pupa"))
 

Make a barplot of the s.sm $ BestExp. Change the order of the bars so that they go like this: Egg, Instar, Pupa, Adult

surv <- read.csv("Datasets/survey.csv")

s.sm <- droplevels(surv[surv$BestExp %in% surv$Rexp, ])


library(plyr)
s.sm$BestExp <- mapvalues(s.sm$BestExp, from = as.character(levels(s.sm$BestExp)), to = c("Instar", "Adult", "Egg", "Pupa"))


ggplot(s.sm, aes(x = BestExp)) + 
     geom_bar() + 
     theme_minimal()

# change order of bars

s.sm$BestExp <- factor(s.sm$BestExp, levels = c("Egg", "Instar", "Pupa", "Adult"))

# replot
ggplot(s.sm, aes(x = BestExp)) + 
     geom_bar() + 
     theme_minimal()


Adding lines to ggplot

# plot horsepower vs quarter second time
ggplot(crs, aes(x = hp, y = qsec)) + 
     geom_point()

# add a loess (local regression) line
ggplot(crs, aes(x = hp, y = qsec)) + 
     geom_point() + 
     geom_smooth(method = "loess")

# add least squares line, without standard errors
ggplot(crs, aes(x = hp, y = qsec)) + 
     geom_point() + 
     geom_smooth(method = "lm", se = FALSE)

# same thing
ggplot(crs, aes(x = hp, y = qsec)) + 
     geom_point() + 
     stat_smooth(method = "lm", se = FALSE)

# same thing, specifying formula
ggplot(crs, aes(x = hp, y = qsec)) + 
     geom_point() + 
     stat_smooth(method = "lm", se = FALSE, formula = y~log(x) )


Exercises(1, cont’):

1.3 With the mtcars.txt data, use ggplot to plot x = drat, y = hp. Add a line to your plot. Feel free to change the line color, theme, point color, etc.

ggplot(crs, aes(x = drat, y = hp)) + 
     geom_point(aes(color = cyl), size = 4) + 
     geom_smooth(method = "lm", se = FALSE, color = "seashell3", size = 3) + 
     theme_classic()

2. Loops and apply functions

################################  EXAMPLE 1 ################################
# for and while loops
################################  EXAMPLE 1 ################################
# for loops repeat a specific number of times
for(i in 1:10){
     print(i + 100)
}
## [1] 101
## [1] 102
## [1] 103
## [1] 104
## [1] 105
## [1] 106
## [1] 107
## [1] 108
## [1] 109
## [1] 110
# make fake data
df1 <- data.frame(person = c("bob", "sue", "jack", "ann"), number1 = c(4,5,6,1), number2 = c(2,4,6,6))
df1
##   person number1 number2
## 1    bob       4       2
## 2    sue       5       4
## 3   jack       6       6
## 4    ann       1       6
# another for loop
for(ii in df1$person){
    print(rep(ii, times = df1[df1$person == ii, "number1"]))
}
## [1] "bob" "bob" "bob" "bob"
## [1] "sue" "sue" "sue" "sue" "sue"
## [1] "jack" "jack" "jack" "jack" "jack" "jack"
## [1] "ann"
# another for loop to make lots of plots
for(foo in c("wt", "hp", "mpg", "qsec", "disp")){
     hist(crs[ , foo], main = foo)
     rug(crs[ ,foo])
}

## while loops repeat until something happens
x <- 1
while(x < 10){
     print(x + 100) 
     x <- x + 1
}
## [1] 101
## [1] 102
## [1] 103
## [1] 104
## [1] 105
## [1] 106
## [1] 107
## [1] 108
## [1] 109

Exercises(2):

2.1 Write a for loop to print the numbers 1:100

for(i in 1:100){
     print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
## [1] 41
## [1] 42
## [1] 43
## [1] 44
## [1] 45
## [1] 46
## [1] 47
## [1] 48
## [1] 49
## [1] 50
## [1] 51
## [1] 52
## [1] 53
## [1] 54
## [1] 55
## [1] 56
## [1] 57
## [1] 58
## [1] 59
## [1] 60
## [1] 61
## [1] 62
## [1] 63
## [1] 64
## [1] 65
## [1] 66
## [1] 67
## [1] 68
## [1] 69
## [1] 70
## [1] 71
## [1] 72
## [1] 73
## [1] 74
## [1] 75
## [1] 76
## [1] 77
## [1] 78
## [1] 79
## [1] 80
## [1] 81
## [1] 82
## [1] 83
## [1] 84
## [1] 85
## [1] 86
## [1] 87
## [1] 88
## [1] 89
## [1] 90
## [1] 91
## [1] 92
## [1] 93
## [1] 94
## [1] 95
## [1] 96
## [1] 97
## [1] 98
## [1] 99
## [1] 100

################################  EXAMPLE 2 ################################
# if/else
################################  EXAMPLE 2 ################################

# you can use "if" and "else" inside loops to do different things

for(i in 1:10){
     if(i > 5) print(i + 100)
     else print("hello")
}
## [1] "hello"
## [1] "hello"
## [1] "hello"
## [1] "hello"
## [1] "hello"
## [1] 106
## [1] 107
## [1] 108
## [1] 109
## [1] 110
# same
for(i in 1:10){
     if(i > 5){
         print(i + 100) 
     } 
     else{
          print("hello")
     } 
}
## [1] "hello"
## [1] "hello"
## [1] "hello"
## [1] "hello"
## [1] "hello"
## [1] 106
## [1] 107
## [1] 108
## [1] 109
## [1] 110
# example with data frame
for(nums in 1:length(df1$number1)){
     print(nums)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
for(nums in 1:length(df1$number1)){
     print(as.character(df1$person[nums]))
}
## [1] "bob"
## [1] "sue"
## [1] "jack"
## [1] "ann"
for(nums in 1:length(df1$number1)){
     if(as.character(df1$person[nums]) == "sue"){
          print("sue was here")
     }
     else if(as.character(df1$person[nums]) == "bob"){
          print("bob was here")
     }
     else print("ann or jack")
}
## [1] "bob was here"
## [1] "sue was here"
## [1] "ann or jack"
## [1] "ann or jack"
# example of making a new vector
vec1 <- rep(NA, nrow(df1))
for(nums in 1:length(df1$number1)){
     if(as.character(df1$person[nums]) == "sue"){
          vec1[nums] <- ("sue was here")
     }
     else if(as.character(df1$person[nums]) == "bob"){
          vec1[nums] <- ("bob was here")
     }
     else vec1[nums] <- ("ann or jack")
}

vec1
## [1] "bob was here" "sue was here" "ann or jack"  "ann or jack"

Exercises(2, cont’):

2.1 Write one for loop for the numbers 1:100.

  • If the number is > 25, print “low”
  • If the number is >= 25 and <= 75, print “middle”
  • If the number is > 75, print “High”
for(i in 1:100){
     if (i < 25 ) print("low")
     else if (i >= 25 & i <= 75) print("middle")
     else print("high")
}
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "low"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "middle"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"
## [1] "high"

Extra Practice

Write a for loop that cycles through the columns of the mtcars.txt dataset (called “crs”, earlier), and makes a histogram, only if the column is numeric.

for(cols in 1:length(colnames(crs))){
     if(is.numeric(crs[, cols])) hist(crs[, cols], main = names(crs[cols]), col = "red")
}


Apply functions

################################  EXAMPLE 2 ################################
# apply functions
################################  EXAMPLE 2 ################################

# here's a simple function
x2 <- function(x) x^2

x2(5)
## [1] 25
# here is a function
trimMean <- function(vect) {
     qs <- quantile(vect, probs = c(.10, .90))
     mean(vect[vect > qs[1] & vect < qs[2]])
}

nums <- c(1:100, 1000)
nums2 <- c(50:100, 1500, 1)
nums3 <- c(1,3,5,6,7,7,5,4,34,.76,87,8,8,8,6,5,4.4,4,56,7,7,88,33)

# make a new list, from our numbers
listNums <- list(nums, nums2, nums3)

trimMean(nums)
## [1] 51
trimMean(nums2)
## [1] 75
trimMean(nums3)
## [1] 9.317647
# trimMean(listNums) doesn't work
# trimMean(nums, nums2) #doesn't work
trimMean(c(nums, nums2, nums3)) # doesn't give three different numbers
## [1] 54.32624
# we could write a for loop
listMNS <- numeric(length(listNums))
for(i in 1:length(listNums)){
     listMNS[i] <- trimMean(listNums[[i]])
}
listMNS
## [1] 51.000000 75.000000  9.317647
# or we could use apply
sapply(listNums, FUN = trimMean)
## [1] 51.000000 75.000000  9.317647
## Apply functions are often faster than loops

# this for loop is slow, because we grow the newVec variable on every iteration of the loop
# note: loop isn't needed here, you could just use 1:30000 + 100 to get a new vector.
system.time({
     newVec <- numeric()
     for(ii in 1:30000){
          newVec[ii] <- ii + 100
     }
})
##    user  system elapsed 
##   1.099   0.341   1.442
head(newVec)
## [1] 101 102 103 104 105 106
## sapply function (for illustration only....sapply is not necessary here)
# first, we write a function for what we want to happen
x100 <- function(x) x + 100


# apply that function to a vector
system.time({
     newVec.sa <- sapply(X = 1:100000, FUN = x100)
})
##    user  system elapsed 
##   0.063   0.001   0.064
head(newVec.sa)
## [1] 101 102 103 104 105 106
## you can speed up a for-loop by pre-allocating space
system.time({

     newVec <- numeric(100000)
     for(ii in 1:100000){
          newVec[ii] <- ii + 100
     }
})
##    user  system elapsed 
##   0.092   0.001   0.093
## you can use apply functions on a data frame

# say you want to calculate the trimmed mean (removing the max and min) for several columns in your data frame

tMean <- function(x){
     x <- x[x != min(x, na.rm = TRUE) & x != max(x, na.rm = TRUE)]
     return(mean(x, na.rm=TRUE))
}

tMean(c(1:100, 10000, NA, NA))
## [1] 51
mean(c(1:100, 10000, NA), na.rm =TRUE)
## [1] 149.0099
# you could use a for loop
tms <- numeric()
for(ii in c("temp", "humidity", "int", "slope")){
     tms[ii] <- tMean(flr[, ii])
}

tms
##         temp     humidity          int        slope 
## 25.302283105 55.290476190  0.094796074  0.008759418
# or you could use apply
# apply allows you to choose the margin -- I think it converts to a matrix first
apply(X = flr[, c("temp", "humidity", "int", "slope")], MARGIN = 2, FUN = tMean)
##         temp     humidity          int        slope 
## 25.302283105 55.290476190  0.094796074  0.008759418
# same thing
sapply(X = flr[, c("temp", "humidity", "int", "slope")], FUN = tMean)
##         temp     humidity          int        slope 
## 25.302283105 55.290476190  0.094796074  0.008759418

Exercises(2, cont’):

2.2 The following function prints the max or the highest level of a factor.

maxHighF <- function(thing) {
     if(is.numeric(thing)) return(max(thing, na.rm = TRUE))
     else if(is.factor(thing)) return(levels(thing)[length(levels(thing))])
     else return(NA)
}


maxHighF(crs$hp) # works
## [1] 335
maxHighF(flr$trt) # works
## [1] "heat and humidifier in alcove"
maxHighF(flr[, c("trt", "humidity")]) # doesn't work
## [1] NA

Apply this function to every column of the flower data frame, using sapply

sapply(flr, FUN = maxHighF)
##                       flowerNum                             trt 
##                           "332" "heat and humidifier in alcove" 
##                        daysOpen                        distance 
##                             "4"                            "12" 
##                     shakeNumber                            temp 
##                             "8"                          "33.8" 
##                        humidity                           notes 
##                            "83"                  "weird petals" 
##                dropFromAnalysis                            file 
##                             "y"        "332_30.8_45_7 08062014" 
##                pxToMmConversion                     firstPollen 
##                   "36.18223984"                           "704" 
##          firstPollenOutOfScreen                     blankFrames 
##                           "908"                          "1211" 
##                           slope                             int 
##                   "0.132515913"                   "0.385708641" 
##                        aperture                           focus 
##                            "16"                          "1:15" 
##                       frameRate                         shutter 
##                          "1000"                    "\"1/2000\"" 
##                       amplitude                            freq 
##                          "0.38"                           "279" 
##                             sec                           plant 
##                     "0.1792115"                        "sml001" 
##                        openTime                       DateOfExp 
##                     "overnight"                        "8/6/14"

Writing your own functions

# functions take in something, do something, then output something

# example 1
printNum.plus5 <- function(num){
     print(num + 5)
}


printNum.plus5(5)
## [1] 10
printNum.plus5(1000)
## [1] 1005
# example 2
plotWithAxisLabels <- function(x1, y1){
     plot(x = x1, y = y1, xlab = "Here is x1", ylab = "Here is y1", 
          bty = "l", pch = 20)
}

plotWithAxisLabels(x1 = flr$distance, y1 = flr$slope)

# example 3 with sapply
# fake data
df1 <- data.frame(person = c("bob", "sue", "jack", "ann"), number1 = c(4,5,6,1), number2 = c(2,4,6,6))  
df1
##   person number1 number2
## 1    bob       4       2
## 2    sue       5       4
## 3   jack       6       6
## 4    ann       1       6
# want to convert to long format
longerizer1 <- function(name){
     rep(name, times = df1[df1$person == name, "number1"])
}

longerizer1("bob")
## [1] "bob" "bob" "bob" "bob"
df1
##   person number1 number2
## 1    bob       4       2
## 2    sue       5       4
## 3   jack       6       6
## 4    ann       1       6
longerizer1("sue")
## [1] "sue" "sue" "sue" "sue" "sue"
longerizer1(c("bob", "sue"))
## [1] "bob" "bob" "bob" "bob" "sue" "sue" "sue" "sue" "sue"

Exercises(3):

Write a function that takes a column name from the flr dataframe as an argument. If the column is numeric, make a scatterplot of x = that column, y = int, with points colored red. If the column is a factor, then make a boxplot of x = column, y = int, where the boxes are filled with blue.

pFunct <- function(colNm){
     if (is.numeric(flr[, colNm])) plot(y = flr$int, x = flr[, colNm], col = "red")
     else if (is.factor(flr[, colNm])) plot(y = flr$int, x = flr[, colNm], col = "blue")
}

par(mfrow = c(1,2))
pFunct(colNm = "trt")

pFunct(colNm = "temp")

# pFunct(colNm = "rrkkr") causes an error

# here's a way to not have the eror crash your script
tryCatch(pFunct(bob), error = function(e) NA)
## [1] NA

Extra Practice:

E.1 Read in the “loblolly.txt” dataset, if you haven’t already. Call it “lob”. Use ggplot() to plot a scatterplot (geom_point()) with x= age, y = height.

lob <- read.table("datasets/loblolly.txt", sep = " ", header = TRUE)
head(lob)
##   height age Seed
## 1   4.51   3  301
## 2  10.89   5  301
## 3  28.72  10  301
## 4  41.74  15  301
## 5  52.70  20  301
## 6  60.92  25  301
ggplot(lob, aes(x = age, y = height)) + 
     geom_point()

E.2 Using the “lob” dataframe, convert “age” to a factor.

 lob$age <- as.factor(lob$age)

Now make a boxplot where x = age, and y = height with ggplot()

lob$age <- as.factor(lob$age)

ggplot(lob, aes(x = age, y = height)) + 
     geom_boxplot()

E.3 Using the “lob” dataframe, add points to the boxplot from 3.7. Advanced change the color of the points, based on the “Seed” variable from the “lob” dataframe (you may want to convert it to a factor first)

seed <- as.factor(lob$Seed)

ggplot(lob, aes(x = age, y = height)) + 
     geom_boxplot() + 
     geom_point(aes(color = seed))

E.3 Change the theme from your previous plot to the “classic” theme with theme_classic().

ggplot(lob, aes(x = age, y = height)) + 
     geom_boxplot() + 
     geom_point(aes(color = seed)) + 
     theme_classic()